import random
import time
import requests
import re
import pandas as pd

headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'zh-CN,zh;q=0.3',
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
    'Connection': 'keep-alive',
    'referer': 'https://item.taobao.com/item.htm?spm=a21n57.1.0.0.617f523cJ9FVK5&id=669786219574&ns=1&abbucket=0',
    # referer我是从哪个页面发起的服务器请求
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
    'cookie': 
}


def info(url):
    data_list = []
    for i in range(1, 11):
        url = 'https://rate.taobao.com/feedRateList.htm?auctionNumId=669786219574&userNumId=2196990067&currentPageNum=' + str(
            i) + '&pageSize=20&rateType=&orderType=sort_weight&attribute=&sku=&hasSku=false&folded=0&ua=098%23E1hv5vv8vWIvUvCkvvvvvjiWn2q9zjrWRLdygj3mPmPwAj1UPFzW1jtVRFS9ljrEdvhvmpvUggnI4vvvKOOCvvpvCvvvRvhvCvvvvvvUvpCWmnPSvvwl1RmQ0fJ6EvLvYN79R3OKvwFpGwmZ5oEpDO7rqbVQWl4vAWLIy2Wl7gzBif6zH2pOS4LrsfK6nHJ3Ib0ySfh3Zi7vQR9t%2Bu9Cvv9vvU22y5KfzO9CvvwUvUVvwZ2UKvhv8vvvvvCvpvvvvvmCF6CvmW%2BvvvWvphvW9pvvvQCvpvs9vvv2vhCv2RvvvpvVvvpvvhCv29hvCvvvMM%2Fevpvhvvmv99%3D%3D&_ksTS=1682870757153_1843&callback=jsonp_tbcrate_reviews_list'
        time.sleep(random.randint(3, 9))
        data = requests.get(url, headers=headers, verify=False).text
        print(data)
        patb = re.compile('"content":"(.*?)"')
        data_list.extend(patb.findall(data))
        print('第' + str(i) + '页爬取完毕')
    deal(data_list)

def deal(data):
    df = pd.DataFrame()
    df["评论"] = data
    df.to_csv("./data/coms.csv", mode="a+", header=None, index=None, encoding="utf-8")

if __name__ == '__main__':
    datatmsp = pd.read_excel('./data/datatmsp1.xls')
    urls = datatmsp['comment_url'].values.tolist()
    for url in urls:
        info(url)
